# This script contains the code for the analysis that underpins the first article I wrote on linkedin
# Link to code

# load libraries - these are very standard R libraries see https://www.tidyverse.org/
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(readxl)

# load the data - downloaded from https://www.gridwatch.templar.co.uk/download.php
# filter by date so that subsequent analysis is consistent
dat <- readr::read_csv('~/Downloads/gridwatch.csv') %>%
  filter(timestamp < '2022-05-12', timestamp > '2012-01-01')
## Rows: 1150600 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (23): id, demand, frequency, coal, nuclear, ccgt, wind, pumped, hydro, ...
## dttm  (1): timestamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# check out the data - one record every 5 minutes, columns for each data source, 1.08M rows
dat
## # A tibble: 1,087,726 × 24
##       id timestamp           demand frequency  coal nuclear  ccgt  wind pumped
##    <dbl> <dttm>               <dbl>     <dbl> <dbl>   <dbl> <dbl> <dbl>  <dbl>
##  1 62694 2012-01-01 00:00:01  30590      50.1  8693    7121  8568  2740      0
##  2 62695 2012-01-01 00:05:06  30490      50.0  8650    7120  8441  2812      0
##  3 62696 2012-01-01 00:10:01  30802      50    8880    7125  8427  2896      0
##  4 62697 2012-01-01 00:15:01  31180      50.0  9111    7122  8494  2964      0
##  5 62698 2012-01-01 00:20:01  31241      50.0  9195    7114  8449  2992      0
##  6 62699 2012-01-01 00:25:10  31340      50.0  9270    7090  8449  3040      0
##  7 62700 2012-01-01 00:30:01  31255      50.0  9164    7060  8490  3050      0
##  8 62701 2012-01-01 00:35:05  31054      50.1  8999    7034  8471  3059      0
##  9 62702 2012-01-01 00:40:14  31166      50.0  8887    7004  8426  3085    271
## 10 62703 2012-01-01 00:45:01  31232      50.0  8879    6982  8496  3100    283
## # … with 1,087,716 more rows, and 15 more variables: hydro <dbl>,
## #   biomass <dbl>, oil <dbl>, solar <dbl>, ocgt <dbl>, french_ict <dbl>,
## #   dutch_ict <dbl>, irish_ict <dbl>, ew_ict <dbl>, nemo <dbl>, other <dbl>,
## #   north_south <dbl>, scotland_england <dbl>, ifa2 <dbl>, nsl <dbl>
nrow(dat)
## [1] 1087726
# we don't really need data at 5 minute intervals, so summarise data to hourly
# we also don't really need all of the power sources at this point
# find median of  each 1 hour interval
# this also makes conversion  from MW to MWh very simple
dat_1hr <- dat %>%
  dplyr::select(timestamp, demand, wind, solar, nuclear, ccgt) %>%
  dplyr::mutate(ts_hour = lubridate::floor_date(timestamp, 'hours')) %>%
  dplyr::group_by(ts_hour) %>%
  dplyr::summarise_if(is.numeric, median, na.rm = TRUE) %>%
  dplyr::ungroup() %>%
  dplyr::mutate_if(
    is.numeric,
    ~ifelse(is.na(.), 0, .)
  ) 

#now have a nuch smaller dataset - 90k rows
dat_1hr
## # A tibble: 90,713 × 6
##    ts_hour             demand  wind solar nuclear  ccgt
##    <dttm>               <dbl> <dbl> <dbl>   <dbl> <dbl>
##  1 2012-01-01 00:00:00 31183  3045      0   7075  8460.
##  2 2012-01-01 01:00:00 30411  2984.     0   6942  8422 
##  3 2012-01-01 02:00:00 29320  2928.     0   6944. 8280 
##  4 2012-01-01 03:00:00 27427  2970      0   6946. 7957 
##  5 2012-01-01 04:00:00 26416. 3004.     0   7014  7560.
##  6 2012-01-01 05:00:00 25822. 2926      0   7116  7210 
##  7 2012-01-01 06:00:00 26032  2982.     0   7118  7307 
##  8 2012-01-01 07:00:00 25558. 2718.     0   7123  7292.
##  9 2012-01-01 08:00:00 24696. 2644.     0   7124  6989 
## 10 2012-01-01 09:00:00 26762. 2643      0   7124. 7314.
## # … with 90,703 more rows
nrow(dat_1hr)
## [1] 90713
# get some intuition about the data
dat_1hr %>%
  dplyr::filter(ts_hour > '2019-01-01', ts_hour < '2019-12-31') %>%
  ggplot(aes(ts_hour, demand)) + geom_line() + ylim(0,50000) + theme_bw()

dat_1hr %>%
  dplyr::filter(ts_hour > '2019-03-01', ts_hour < '2019-03-16') %>%
  ggplot(aes(ts_hour, demand)) + geom_line() + ylim(0,50000) + theme_bw()

# get some intuition about the data-  what does a year look like with demand and supply?
dat_1hr %>%
  dplyr::filter(ts_hour > '2019-01-01', ts_hour < '2019-12-31') %>%
  ggplot(aes(ts_hour, demand, color = 'black')) + 
    geom_line() + 
    geom_line(aes(y=ccgt), color = 'magenta', alpha = 0.3) +
    geom_line(aes(y=solar), color = 'orange', alpha  = 0.5) +
    geom_line(aes(y=wind), color = 'blue', alpha = 0.8) +
    geom_line(aes(y=nuclear), color = 'darkgrey') +
    ylim(0,50000) + 
    xlab('') + ylab('Generation/Demand MWh') +
    theme_bw() +
    scale_colour_manual(name = '', guide = 'legend',
                      values =c(
                        'demand' = 'black',
                        'wind'='blue',
                        'solar'='orange', 
                        'nuclear' = 'darkgrey', 
                        'ccgt' = 'magenta'))

# what about a month
dat_1hr %>%
  dplyr::filter(ts_hour > '2019-03-01', ts_hour < '2019-03-16') %>%
  ggplot(aes(ts_hour, demand, color = 'black')) + 
    geom_line() + 
    geom_line(aes(y=wind), color = 'blue') +
    geom_line(aes(y=solar), color = 'orange') +
    geom_line(aes(y=nuclear), color = 'darkgrey') +
    geom_line(aes(y=ccgt), color = 'magenta') +    
    ylim(0,50000) + 
    xlab('') + ylab('Generation/Demand MWh') +
    theme_bw() +
    scale_colour_manual(name = '', guide = 'legend',
                        values =c(
                          'demand' = 'black',
                          'wind'='blue',
                          'solar'='orange', 
                          'nuclear' = 'darkgrey', 
                          'ccgt' = 'magenta'))

# and finally - what about a low wind month?
dat_1hr %>%
  dplyr::filter(ts_hour > '2022-03-01', ts_hour < '2022-04-01') %>%
  ggplot(aes(ts_hour, demand, color = 'black')) + 
  geom_line() + 
  geom_line(aes(y=wind), color = 'blue') +
  geom_line(aes(y=solar), color = 'orange') +
  geom_line(aes(y=nuclear), color = 'darkgrey') +
  geom_line(aes(y=ccgt), color = 'magenta') +    
  ylim(0,50000) + 
  xlab('') + ylab('Generation/Demand MWh') +
  theme_bw()  +
  scale_colour_manual(name = '', guide = 'legend',
                      values =c(
                        'demand' = 'black',
                        'wind'='blue',
                        'solar'='orange', 
                        'nuclear' = 'darkgrey', 
                        'ccgt' = 'magenta'))

# note that 'demand' in gridwatch data is affected by unmetered wind a solar 
# this shows up as reductions in demand - eg household solar reduces that household's requirements

# so far we have seen that demand is variable - day/night, weekend vs weekday, winter vs summer.
# solar and wind are also variable, but gas seems to be able to ramp up and down around demand/RE variablility

# the next step of the analysis is to see what would happen with different grids
# to do this we need to normalise the wind and solar output - ie what would it have looked like if we 
# hadn't been building more over time.

# simplest way to do this is to use the cumulative maximum for wind and solar

# find max of each 1 hour interval and then cumulative max for wind and solar
max_vargen <- dat %>%
  dplyr::select(timestamp, demand, wind, solar) %>%
  dplyr::filter(solar < 100000) %>%  # gets rid of two rogue rows
  dplyr::mutate(ts_hour = lubridate::floor_date(timestamp, 'hours')) %>%
  dplyr::group_by(ts_hour) %>%
  dplyr::summarise_if(is.numeric, max, na.rm = TRUE) %>%
  dplyr::ungroup() %>%
  dplyr::mutate_if(
    is.numeric,
    ~ifelse(is.na(.), 0, .)
  ) %>%
  mutate(max_wind = cummax(wind),
         max_solar = cummax(solar)) %>%
  dplyr::select(ts_hour, max_wind, max_solar)
max_vargen
## # A tibble: 90,713 × 3
##    ts_hour             max_wind max_solar
##    <dttm>                 <dbl>     <dbl>
##  1 2012-01-01 00:00:00     3100         0
##  2 2012-01-01 01:00:00     3116         0
##  3 2012-01-01 02:00:00     3116         0
##  4 2012-01-01 03:00:00     3116         0
##  5 2012-01-01 04:00:00     3116         0
##  6 2012-01-01 05:00:00     3116         0
##  7 2012-01-01 06:00:00     3116         0
##  8 2012-01-01 07:00:00     3116         0
##  9 2012-01-01 08:00:00     3116         0
## 10 2012-01-01 09:00:00     3116         0
## # … with 90,703 more rows
# combine median and max wind/solar so we can calculate % wind/solar
# this will be an overestimate of % since we will be likely be undestimating wind/solar capacity
# but in addition mix and location of wind has changed over time -> high capacity factors
# lets us normalise wind and solar output against max capacity available at time
normalised_vargen <- dat_1hr %>% 
  inner_join(max_vargen, by='ts_hour') %>%
  mutate(pct_wind =  wind/max_wind,
         pct_solar = solar/max_solar)

#plot normalised  wind output
ggplot(normalised_vargen %>% dplyr::filter(ts_hour > '2013-01-01'), aes(ts_hour, wind)) + 
  geom_line(aes(y=max_wind), color = 'black', linetype = 'dashed')  + 
  geom_line(color='black')  +
  theme_bw() +
  xlab('') + ylab('Generation MWh') +
  theme_bw()  

ggplot(normalised_vargen %>% dplyr::filter(ts_hour > '2013-01-01'), aes(ts_hour, pct_wind)) + 
  geom_line()  + theme_bw()

# plot normalised solar
ggplot(normalised_vargen %>% dplyr::filter(ts_hour > '2017-04-01'), aes(ts_hour, solar)) + 
  geom_line(aes(y=max_solar))  + 
  geom_line()  +
  theme_bw()

ggplot(normalised_vargen %>% dplyr::filter(ts_hour > '2017-04-01'), aes(ts_hour, pct_solar)) + geom_line()  + theme_bw()

# model 1 - simple model with just wind, gas, solar, no capacity constraints or overgeneration

#specify wind, solar and gas capacity
wind_cap_m1 <- 20000
gas_cap_m1 <- 50000
solar_cap_m1 <- 10000

# scaleup wind and solar to model capacity, then let gas fill in the gaps
model1 <- normalised_vargen %>%
  dplyr::filter(ts_hour > '2017-04-01') %>%
  dplyr::select(ts_hour, demand, pct_wind, pct_solar) %>%
  dplyr::mutate(solar = pct_solar * solar_cap_m1,
                wind = pct_wind * wind_cap_m1,
                gas = demand - wind - solar)

model1 %>%
  dplyr::filter(ts_hour > '2022-03-01', ts_hour < '2022-04-01') %>%
  ggplot(aes(ts_hour, demand, color = 'black')) + 
  geom_line() + 
  geom_line(aes(y=wind), color = 'blue') +
  geom_line(aes(y=solar), color = 'orange') +
  geom_line(aes(y=gas), color = 'magenta') +    
  ylim(0,50000) + 
  xlab('') + ylab('Generation/Demand MWh') +
  theme_bw()  +
  scale_colour_manual(name = '', guide = 'legend',
                      values =c(
                        'demand' = 'black',
                        'wind'='blue',
                        'solar'='orange', 
                        'nuclear' = 'darkgrey', 
                        'gas' = 'magenta'))

# model 2 - add curtailment and excess
# need this model to look at what happens when we add more wind to the system.
# what happens when we have 'too much' wind for system to cope with?
# reach point of diminishing returns- most of new wind output curtailed but still have wind lull problem
# EV's could help by soaking up excess, interconnectors could export/import
# but this is the point where storage starts to help us use more of our otherwise curtailed wind
wind_cap_m2 <- 60000
gas_cap_m2 <- 50000
solar_cap_m2 <- 20000


model2 <- normalised_vargen %>%
  dplyr::filter(ts_hour > '2017-04-01') %>%
  dplyr::select(ts_hour, demand, pct_wind, pct_solar) %>%
  dplyr::mutate(solar = pct_solar * solar_cap_m2,
                wind = pct_wind * wind_cap_m2,
                gas = pmax(pmin(demand - wind - solar, gas_cap_m2), 0),
                deficit = pmax(demand - wind - solar - gas, 0),
                curtailment = pmax(wind + solar + gas - demand, 0))

model2 %>%
  dplyr::filter(ts_hour > '2021-01-01', ts_hour < '2022-01-01') %>%
  ggplot(aes(ts_hour, demand, color = 'black')) + 
  geom_line() + 
  geom_line(aes(y=wind), color = 'blue') +
  geom_line(aes(y=solar), color = 'orange') +
  geom_line(aes(y=gas), color = 'magenta') + 
  geom_line(aes(y=-curtailment), color = 'red')  +
  xlab('') + ylab('Generation/Demand MWh') +
  theme_bw()  +
  scale_colour_manual(name = '', guide = 'legend',
                      values =c(
                        'demand' = 'black',
                        'wind'='blue',
                        'solar'='orange', 
                        'nuclear' = 'darkgrey', 
                        'gas' = 'magenta',
                        'curtailment' = 'red'))

model2 %>%
  dplyr::filter(ts_hour > '2021-01-01', ts_hour < '2022-01-01') %>%
  dplyr::summarise_at(c('wind', 'solar', 'gas', 'curtailment', 'deficit'), ~sum(., na.rm = TRUE)/1000000)
## # A tibble: 1 × 5
##    wind solar   gas curtailment deficit
##   <dbl> <dbl> <dbl>       <dbl>   <dbl>
## 1  207.  21.6  77.0        45.8       0
model2 %>%
  dplyr::filter(ts_hour > '2022-03-01', ts_hour < '2022-04-01') %>%
  ggplot(aes(ts_hour, demand, color = 'black')) + 
  geom_line() + 
  geom_line(aes(y=wind), color = 'blue') +
  geom_line(aes(y=solar), color = 'orange') +
  geom_line(aes(y=gas), color = 'magenta') + 
  geom_line(aes(y=-curtailment), color = 'red')  +
  xlab('') + ylab('Generation/Demand MWh') +
  theme_bw()  +
  scale_colour_manual(name = '', guide = 'legend',
                      values =c(
                        'demand' = 'black',
                        'wind'='blue',
                        'solar'='orange', 
                        'nuclear' = 'darkgrey', 
                        'gas' = 'magenta',
                        'curtailment' = 'red'))

# now let's try to do the same thing but across a range of wind capacity

model2_fn <- function(df, start_date, end_date, wind_cap, gas_cap, solar_cap) {
  
  df %>%
    dplyr::filter(ts_hour >= start_date, ts_hour <= end_date) %>%
    dplyr::select(ts_hour, demand, pct_wind, pct_solar) %>%
    dplyr::mutate(solar = pct_solar * solar_cap,
                  wind = pct_wind * wind_cap,
                  gas = pmax(pmin(demand - wind - solar, gas_cap), 0),
                  deficit = pmax(demand - wind - solar - gas, 0),
                  curtailment = pmax(wind + solar + gas - demand, 0)) %>%
    dplyr::summarise_at(c('wind', 'solar', 'gas', 'curtailment', 'deficit'), ~sum(., na.rm = TRUE)/1000000)
  
}

normalised_vargen %>%
  model2_fn(
    start_date = '2017-01-01',
    end_date = '2021-12-31',
    wind_cap = 20000,
    gas_cap = 50000,
    solar_cap = 20000)
## # A tibble: 1 × 5
##    wind solar   gas curtailment deficit
##   <dbl> <dbl> <dbl>       <dbl>   <dbl>
## 1  369.  117.  846.        1.31       0
# do a full simulation with different amounts of wind and gas
sim_df <- tibble(
  input_data = list(normalised_vargen),
  start_date = '2017-01-01',
  end_date = '2021-12-31',
  solar_cap = 20000
) %>% tidyr::crossing(
  wind_cap = c(10000,20000, 40000, 80000, 120000),
  gas_cap = c(20000, 35000, 50000)
) %>%
  dplyr::mutate(
    sim_out = purrr::pmap(
      .f=model2_fn,
      .l=list(
        df=input_data, 
        start_date=start_date, 
        end_date=end_date, 
        wind_cap=wind_cap, 
        solar_cap=solar_cap, 
        gas_cap=gas_cap
        )
      )
    ) %>%
  tidyr::unnest(sim_out)

sim_df
## # A tibble: 15 × 11
##    input_data start_date end_date   solar_cap wind_cap gas_cap  wind solar   gas
##    <list>     <chr>      <chr>          <dbl>    <dbl>   <dbl> <dbl> <dbl> <dbl>
##  1 <tibble>   2017-01-01 2021-12-31     20000    10000   20000  185.  117.  808.
##  2 <tibble>   2017-01-01 2021-12-31     20000    10000   35000  185.  117. 1016.
##  3 <tibble>   2017-01-01 2021-12-31     20000    10000   50000  185.  117. 1029.
##  4 <tibble>   2017-01-01 2021-12-31     20000    20000   20000  369.  117.  713.
##  5 <tibble>   2017-01-01 2021-12-31     20000    20000   35000  369.  117.  840.
##  6 <tibble>   2017-01-01 2021-12-31     20000    20000   50000  369.  117.  846.
##  7 <tibble>   2017-01-01 2021-12-31     20000    40000   20000  738.  117.  465.
##  8 <tibble>   2017-01-01 2021-12-31     20000    40000   35000  738.  117.  523.
##  9 <tibble>   2017-01-01 2021-12-31     20000    40000   50000  738.  117.  525.
## 10 <tibble>   2017-01-01 2021-12-31     20000    80000   20000 1476.  117.  231.
## 11 <tibble>   2017-01-01 2021-12-31     20000    80000   35000 1476.  117.  252.
## 12 <tibble>   2017-01-01 2021-12-31     20000    80000   50000 1476.  117.  253.
## 13 <tibble>   2017-01-01 2021-12-31     20000   120000   20000 2214.  117.  141.
## 14 <tibble>   2017-01-01 2021-12-31     20000   120000   35000 2214.  117.  152.
## 15 <tibble>   2017-01-01 2021-12-31     20000   120000   50000 2214.  117.  152.
## # … with 2 more variables: curtailment <dbl>, deficit <dbl>
# what does wind, solar, gas generaiton look like
sim_df %>%
  dplyr::select(wind_cap, gas_cap, wind, solar, gas) %>%
  tidyr::pivot_longer(cols = c('wind', 'solar', 'gas'), names_to = 'source', values_to = 'TWh') %>%
  ggplot(aes(as.factor(wind_cap/1000), TWh, fill=source)) + 
    geom_bar(stat='identity') + 
    facet_wrap(~gas_cap/1000, labeller = labeller(.default = function(val) {paste0(val, ' GW Gas')})) + 
    xlab('Total Wind Capacity (GW)') +
    theme_bw() +
    scale_fill_manual(name = '', guide = 'legend',
                      values =c(
                      #  'demand' = 'black',
                        'wind'='blue',
                        'solar'='orange', 
                       # 'nuclear' = 'darkgrey', 
                        'gas' = 'magenta'
                      #  'curtailment' = 'red',
                       # 'deficit' = 'purple'
                      ))

#what about deficit and curtailment
sim_df %>%
  dplyr::transmute(wind_cap, gas_cap, curtailment, deficit = -deficit) %>%
  tidyr::pivot_longer(cols = c('curtailment', 'deficit'), names_to = 'source', values_to = 'TWh') %>%
  ggplot(aes(as.factor(wind_cap/1000), TWh, fill=source)) + 
  geom_bar(stat='identity') + 
  facet_wrap(~gas_cap/1000, labeller = labeller(.default = function(val) {paste0(val, ' GW Gas')})) + 
  xlab('Total Wind Capacity (GW)') +
  theme_bw() +
  scale_fill_manual(name = '', guide = 'legend',
                    values =c(
                      #  'demand' = 'black',
                      #'wind'='blue',
                      #'solar'='orange', 
                      # 'nuclear' = 'darkgrey', 
                      #'gas' = 'magenta'
                        'curtailment' = 'red',
                       'deficit' = 'purple'
                    ))

# finally consider amount of gas, deficit and curtailment
sim_df %>%
  dplyr::transmute(wind_cap, gas_cap, curtailment, gas, deficit=-deficit) %>%
  tidyr::pivot_longer(cols = c('curtailment',  'gas', 'deficit'), names_to = 'source', values_to = 'TWh') %>%
  ggplot(aes(as.factor(wind_cap/1000), TWh, fill=source)) + 
  geom_bar(stat='identity') + 
  facet_wrap(~gas_cap/1000, labeller = labeller(.default = function(val) {paste0(val, ' GW Gas')})) + 
  xlab('Total Wind Capacity (GW)') +
  theme_bw() +
  scale_fill_manual(name = '', guide = 'legend',
                    values =c(
                      #  'demand' = 'black',
                      #'wind'='blue',
                      #'solar'='orange', 
                      # 'nuclear' = 'darkgrey', 
                      'gas' = 'magenta',
                      'curtailment' = 'red',
                      'deficit' = 'purple'
                    ))